{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# 日期的范围、频率以及移动\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "from datetime import datetime\n",
    "from datetime import timedelta\n",
    "from dateutil.parser import parse\n",
    "from pandas import DataFrame, Series\n",
    "from pandas.tseries.offsets import Day, Hour, Minute, MonthEnd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "dates = [datetime(2011, 1, 2),\n",
    "         datetime(2011, 1, 5),\n",
    "         datetime(2011, 1, 7),\n",
    "         datetime(2011, 1, 8),\n",
    "         datetime(2011, 1, 10),\n",
    "         datetime(2011, 1, 12)]\n",
    "ts = Series(np.random.randn(6), index=dates)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2011-01-02 00:00:00 1.1469586667\n",
      "2011-01-03 00:00:00 nan\n",
      "2011-01-04 00:00:00 nan\n",
      "2011-01-05 00:00:00 -0.675617048673\n",
      "2011-01-06 00:00:00 nan\n",
      "2011-01-07 00:00:00 -0.944340550725\n",
      "2011-01-08 00:00:00 0.787661952498\n",
      "2011-01-09 00:00:00 nan\n",
      "2011-01-10 00:00:00 0.508436289658\n",
      "2011-01-11 00:00:00 nan\n",
      "2011-01-12 00:00:00 -0.623751221526\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\ipykernel\\__main__.py:1: FutureWarning: \n",
      ".resample() is now a deferred operation\n",
      "You called iteritems(...) on this deferred object which materialized it into a series\n",
      "by implicitly taking the mean.  Use .resample(...).mean() instead\n",
      "  if __name__ == '__main__':\n"
     ]
    }
   ],
   "source": [
    "for k, v in ts.resample('D').iteritems(): # 按日重新采样，填充进去的日期设置为NA。\n",
    "    print(k, v)\n",
    "# .resample() is now a deferred operation，书里面直接就打印了，这里不行。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# 生成日期范围"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "DatetimeIndex(['2012-04-01', '2012-04-02', '2012-04-03', '2012-04-04',\n",
       "               '2012-04-05', '2012-04-06', '2012-04-07', '2012-04-08',\n",
       "               '2012-04-09', '2012-04-10', '2012-04-11', '2012-04-12',\n",
       "               '2012-04-13', '2012-04-14', '2012-04-15', '2012-04-16',\n",
       "               '2012-04-17', '2012-04-18', '2012-04-19', '2012-04-20',\n",
       "               '2012-04-21', '2012-04-22', '2012-04-23', '2012-04-24',\n",
       "               '2012-04-25', '2012-04-26', '2012-04-27', '2012-04-28',\n",
       "               '2012-04-29', '2012-04-30', '2012-05-01', '2012-05-02',\n",
       "               '2012-05-03', '2012-05-04', '2012-05-05', '2012-05-06',\n",
       "               '2012-05-07', '2012-05-08', '2012-05-09', '2012-05-10',\n",
       "               '2012-05-11', '2012-05-12', '2012-05-13', '2012-05-14',\n",
       "               '2012-05-15', '2012-05-16', '2012-05-17', '2012-05-18',\n",
       "               '2012-05-19', '2012-05-20', '2012-05-21', '2012-05-22',\n",
       "               '2012-05-23', '2012-05-24', '2012-05-25', '2012-05-26',\n",
       "               '2012-05-27', '2012-05-28', '2012-05-29', '2012-05-30',\n",
       "               '2012-05-31', '2012-06-01'],\n",
       "              dtype='datetime64[ns]', freq='D')"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "index = pd.date_range('4/1/2012', '6/1/2012')\n",
    "index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "DatetimeIndex(['2012-04-01', '2012-04-02', '2012-04-03', '2012-04-04',\n",
       "               '2012-04-05', '2012-04-06', '2012-04-07', '2012-04-08',\n",
       "               '2012-04-09', '2012-04-10', '2012-04-11', '2012-04-12',\n",
       "               '2012-04-13', '2012-04-14', '2012-04-15', '2012-04-16',\n",
       "               '2012-04-17', '2012-04-18', '2012-04-19', '2012-04-20'],\n",
       "              dtype='datetime64[ns]', freq='D')"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.date_range(start='4/1/2012', periods=20) # 20天"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "DatetimeIndex(['2012-05-13', '2012-05-14', '2012-05-15', '2012-05-16',\n",
       "               '2012-05-17', '2012-05-18', '2012-05-19', '2012-05-20',\n",
       "               '2012-05-21', '2012-05-22', '2012-05-23', '2012-05-24',\n",
       "               '2012-05-25', '2012-05-26', '2012-05-27', '2012-05-28',\n",
       "               '2012-05-29', '2012-05-30', '2012-05-31', '2012-06-01'],\n",
       "              dtype='datetime64[ns]', freq='D')"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.date_range(end='6/1/2012', periods=20) # 20天，方向向前"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "DatetimeIndex(['2000-01-31', '2000-02-29', '2000-03-31', '2000-04-28',\n",
       "               '2000-05-31', '2000-06-30', '2000-07-31', '2000-08-31',\n",
       "               '2000-09-29', '2000-10-31', '2000-11-30'],\n",
       "              dtype='datetime64[ns]', freq='BM')"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.date_range('1/1/2000', '12/1/2000', freq='BM') # BM = business end of month，结果调整到月底"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "DatetimeIndex(['2012-05-02 12:56:31', '2012-05-03 12:56:31',\n",
       "               '2012-05-04 12:56:31', '2012-05-05 12:56:31',\n",
       "               '2012-05-06 12:56:31'],\n",
       "              dtype='datetime64[ns]', freq='D')"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.date_range('5/2/2012 12:56:31', periods=5) # 时分秒保留"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "DatetimeIndex(['2012-05-02', '2012-05-03', '2012-05-04', '2012-05-05',\n",
       "               '2012-05-06'],\n",
       "              dtype='datetime64[ns]', freq='D')"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.date_range('5/2/2012 12:56:31', periods=5, normalize=True) # 时分秒被和谐"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# 频率和日期偏移量"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "hour = Hour()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "four_hours = Hour(4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "DatetimeIndex(['2000-01-01 00:00:00', '2000-01-01 04:00:00',\n",
       "               '2000-01-01 08:00:00', '2000-01-01 12:00:00',\n",
       "               '2000-01-01 16:00:00', '2000-01-01 20:00:00',\n",
       "               '2000-01-02 00:00:00', '2000-01-02 04:00:00',\n",
       "               '2000-01-02 08:00:00', '2000-01-02 12:00:00',\n",
       "               '2000-01-02 16:00:00', '2000-01-02 20:00:00',\n",
       "               '2000-01-03 00:00:00', '2000-01-03 04:00:00',\n",
       "               '2000-01-03 08:00:00', '2000-01-03 12:00:00',\n",
       "               '2000-01-03 16:00:00', '2000-01-03 20:00:00'],\n",
       "              dtype='datetime64[ns]', freq='4H')"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.date_range('1/1/2000', '1/3/2000 23:59', freq='4h') # 以4小时为间隔单位,两边闭区间。如果右面是1/4/2000 00:00，那么符合生成条件。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<150 * Minutes>"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Hour(2) + Minute(30)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "DatetimeIndex(['2000-01-01 00:00:00', '2000-01-01 01:30:00',\n",
       "               '2000-01-01 03:00:00', '2000-01-01 04:30:00',\n",
       "               '2000-01-01 06:00:00', '2000-01-01 07:30:00',\n",
       "               '2000-01-01 09:00:00', '2000-01-01 10:30:00',\n",
       "               '2000-01-01 12:00:00', '2000-01-01 13:30:00'],\n",
       "              dtype='datetime64[ns]', freq='90T')"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.date_range('1/1/2000', periods=10, freq='1h30min')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# 时间序列基础频率\n",
    "# D：       每日\n",
    "# B：       每工作日\n",
    "# H：       每小时\n",
    "# T/min：   每分钟\n",
    "# S：       每秒\n",
    "# L/ms：    每毫秒\n",
    "# U：       每微秒\n",
    "# M：       每月最后一个日历日\n",
    "# BM：      每月最后一个工作日\n",
    "# MS：      每月第一个日历日\n",
    "# BNS：     每月第一个工作日\n",
    "# W-MON：   每月从指定的星期几开始算起\n",
    "# WOM-1MON：产生每月的第一、第二、第x周的周几。WOM-3FRI表示每月第三个星期五。\n",
    "# Q-JAN：   对于以指定月份结束的年度，每季度最后一月的最后一个日历日。\n",
    "# BQ-JAN：  对于以指定月份结束的年度，每季度最后一月的最后一个工作日。\n",
    "# QS-JAN：  对于以指定月份结束的年度，每季度最后一月的第一个日历日。\n",
    "# BQS-JAN： 对于以指定月份结束的年度，每季度最后一月的第一个工作日。\n",
    "# A-JAN：   每年指定月份的最后一个日历日 \n",
    "# BA-JAN：  每年指定月份的最后一个工作日\n",
    "# AS-JAN：  每年指定月份的第一个日历日\n",
    "# BAS-JAN： 每年指定月份的第一个工作日"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# WOM日期（Week Of Month）"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "DatetimeIndex(['2017-10-20', '2017-11-17', '2017-12-15'], dtype='datetime64[ns]', freq='WOM-3FRI')"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rng = pd.date_range('10/1/2017', '12/31/2017', freq='WOM-3FRI') # 每月的第三个周五\n",
    "rng"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# 移动（超前和滞后）数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2000-01-31   -1.183179\n",
       "2000-02-29    0.215973\n",
       "2000-03-31   -3.088165\n",
       "2000-04-30    0.245773\n",
       "Freq: M, dtype: float64"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ts = Series(np.random.randn(4),\n",
    "            index=pd.date_range('1/1/2000', periods=4, freq='M'))\n",
    "ts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2000-01-31         NaN\n",
       "2000-02-29         NaN\n",
       "2000-03-31   -1.183179\n",
       "2000-04-30    0.215973\n",
       "Freq: M, dtype: float64"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ts.shift(2) # 数据整体往后推2步， key不动。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2000-01-31   -3.088165\n",
       "2000-02-29    0.245773\n",
       "2000-03-31         NaN\n",
       "2000-04-30         NaN\n",
       "Freq: M, dtype: float64"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ts.shift(-2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2000-01-31          NaN\n",
       "2000-02-29    -1.182536\n",
       "2000-03-31   -15.298850\n",
       "2000-04-30    -1.079585\n",
       "Freq: M, dtype: float64"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ts / ts.shift(1) - 1 # 相对于前一条记录的变化率，比如股票涨跌幅。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2000-03-31   -1.183179\n",
       "2000-04-30    0.215973\n",
       "2000-05-31   -3.088165\n",
       "2000-06-30    0.245773\n",
       "Freq: M, dtype: float64"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ts.shift(2, freq='M') # 指定freq后调整key，加2个月。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2000-02-03   -1.183179\n",
       "2000-03-03    0.215973\n",
       "2000-04-03   -3.088165\n",
       "2000-05-03    0.245773\n",
       "dtype: float64"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ts.shift(3, freq='D')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2000-02-03   -1.183179\n",
       "2000-03-03    0.215973\n",
       "2000-04-03   -3.088165\n",
       "2000-05-03    0.245773\n",
       "dtype: float64"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ts.shift(1, freq='3D') # 3D等价3天"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# 通过偏移量对日期进行位移"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Timestamp('2011-11-20 00:00:00')"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "now = datetime(2011, 11, 17)\n",
    "now + 3 * Day()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Timestamp('2011-11-30 00:00:00')"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "now + MonthEnd() # 移动到月底"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Timestamp('2011-12-31 00:00:00')"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "now + MonthEnd(2) # 推到下个月月底"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2011-11-30 00:00:00\n",
      "2011-10-31 00:00:00\n"
     ]
    }
   ],
   "source": [
    "offset = MonthEnd()\n",
    "print(offset.rollforward(now)) # 等价now + MonthEnd()\n",
    "print(offset.rollback(now)) # 移动到now之前那个月的月底"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2000-01-31   -0.064773\n",
       "2000-02-29    0.430363\n",
       "2000-03-31   -0.073221\n",
       "dtype: float64"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ts = Series(np.random.randn(20),\n",
    "            index=pd.date_range('1/15/2000', periods=20, freq='4d'))\n",
    "ts.groupby(offset.rollforward).mean() # 根据rollforward的结果分组并统计"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\ipykernel\\__main__.py:1: FutureWarning: how in .resample() is deprecated\n",
      "the new syntax is .resample(...).mean()\n",
      "  if __name__ == '__main__':\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "2000-01-31   -0.064773\n",
       "2000-02-29    0.430363\n",
       "2000-03-31   -0.073221\n",
       "Freq: M, dtype: float64"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ts.resample('M', how='mean') # 更粗暴直接的做法"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
